{
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# Multiple Linear Regression Part 2"
      ],
      "metadata": {}
    },
    {
      "cell_type": "markdown",
      "source": [
        "Multiple Linear Regression (MLR) is used for more than 1 variables or features to find the relationship by fitting a linear equation.   \n",
        "\n",
        "Y: 1 continuous target variable     \n",
        "X: 2 or more predictor variables        \n",
        "b0: intercept (X=0)     \n",
        "b1: the coefficient or parameter of x1   \n",
        "b2: the coefficient of parameter x2 and so on...        \n",
        "\n\nTo find the the parameter or coefficients for multiple linear regression with very large dataset and high dimensionality., you should use optimization approach ."
      ],
      "metadata": {}
    },
    {
      "cell_type": "markdown",
      "source": [
        "Data Preprocessing"
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "# Importing the libraries\n",
        "import pandas as pd\n",
        "import numpy as np\n",
        "import matplotlib.pyplot as plt\n",
        "\n",
        "import warnings\n",
        "warnings.filterwarnings(\"ignore\")\n",
        "\n",
        "# fix_yahoo_finance is used to fetch data \n",
        "import fix_yahoo_finance as yf\n",
        "yf.pdr_override()"
      ],
      "outputs": [],
      "execution_count": 1,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# input\n",
        "symbol = 'AMD'\n",
        "start = '2014-01-01'\n",
        "end = '2018-08-27'\n",
        "\n",
        "# Read data \n",
        "dataset = yf.download(symbol,start,end)\n",
        "\n",
        "# View columns \n",
        "dataset.head()"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[*********************100%***********************]  1 of 1 downloaded\n"
          ]
        },
        {
          "output_type": "execute_result",
          "execution_count": 2,
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Open</th>\n",
              "      <th>High</th>\n",
              "      <th>Low</th>\n",
              "      <th>Close</th>\n",
              "      <th>Adj Close</th>\n",
              "      <th>Volume</th>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>Date</th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>2014-01-02</th>\n",
              "      <td>3.85</td>\n",
              "      <td>3.98</td>\n",
              "      <td>3.84</td>\n",
              "      <td>3.95</td>\n",
              "      <td>3.95</td>\n",
              "      <td>20548400</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-03</th>\n",
              "      <td>3.98</td>\n",
              "      <td>4.00</td>\n",
              "      <td>3.88</td>\n",
              "      <td>4.00</td>\n",
              "      <td>4.00</td>\n",
              "      <td>22887200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-06</th>\n",
              "      <td>4.01</td>\n",
              "      <td>4.18</td>\n",
              "      <td>3.99</td>\n",
              "      <td>4.13</td>\n",
              "      <td>4.13</td>\n",
              "      <td>42398300</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-07</th>\n",
              "      <td>4.19</td>\n",
              "      <td>4.25</td>\n",
              "      <td>4.11</td>\n",
              "      <td>4.18</td>\n",
              "      <td>4.18</td>\n",
              "      <td>42932100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-08</th>\n",
              "      <td>4.23</td>\n",
              "      <td>4.26</td>\n",
              "      <td>4.14</td>\n",
              "      <td>4.18</td>\n",
              "      <td>4.18</td>\n",
              "      <td>30678700</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "            Open  High   Low  Close  Adj Close    Volume\n",
              "Date                                                    \n",
              "2014-01-02  3.85  3.98  3.84   3.95       3.95  20548400\n",
              "2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200\n",
              "2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300\n",
              "2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100\n",
              "2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 2,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "dataset.describe()"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 3,
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Open</th>\n",
              "      <th>High</th>\n",
              "      <th>Low</th>\n",
              "      <th>Close</th>\n",
              "      <th>Adj Close</th>\n",
              "      <th>Volume</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>count</th>\n",
              "      <td>1172.000000</td>\n",
              "      <td>1172.000000</td>\n",
              "      <td>1172.000000</td>\n",
              "      <td>1172.000000</td>\n",
              "      <td>1172.000000</td>\n",
              "      <td>1.172000e+03</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>mean</th>\n",
              "      <td>7.013959</td>\n",
              "      <td>7.161143</td>\n",
              "      <td>6.868012</td>\n",
              "      <td>7.015776</td>\n",
              "      <td>7.015776</td>\n",
              "      <td>3.785747e+07</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>std</th>\n",
              "      <td>4.880564</td>\n",
              "      <td>4.985040</td>\n",
              "      <td>4.779099</td>\n",
              "      <td>4.887464</td>\n",
              "      <td>4.887464</td>\n",
              "      <td>3.456219e+07</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>min</th>\n",
              "      <td>1.620000</td>\n",
              "      <td>1.690000</td>\n",
              "      <td>1.610000</td>\n",
              "      <td>1.620000</td>\n",
              "      <td>1.620000</td>\n",
              "      <td>0.000000e+00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>25%</th>\n",
              "      <td>2.710000</td>\n",
              "      <td>2.780000</td>\n",
              "      <td>2.660000</td>\n",
              "      <td>2.707500</td>\n",
              "      <td>2.707500</td>\n",
              "      <td>1.308202e+07</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>50%</th>\n",
              "      <td>4.250000</td>\n",
              "      <td>4.350000</td>\n",
              "      <td>4.175000</td>\n",
              "      <td>4.275000</td>\n",
              "      <td>4.275000</td>\n",
              "      <td>2.900380e+07</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>75%</th>\n",
              "      <td>11.585000</td>\n",
              "      <td>11.785000</td>\n",
              "      <td>11.310000</td>\n",
              "      <td>11.555000</td>\n",
              "      <td>11.555000</td>\n",
              "      <td>5.064272e+07</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>max</th>\n",
              "      <td>24.940001</td>\n",
              "      <td>27.299999</td>\n",
              "      <td>24.629999</td>\n",
              "      <td>25.260000</td>\n",
              "      <td>25.260000</td>\n",
              "      <td>3.250584e+08</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "              Open         High          Low        Close    Adj Close  \\\n",
              "count  1172.000000  1172.000000  1172.000000  1172.000000  1172.000000   \n",
              "mean      7.013959     7.161143     6.868012     7.015776     7.015776   \n",
              "std       4.880564     4.985040     4.779099     4.887464     4.887464   \n",
              "min       1.620000     1.690000     1.610000     1.620000     1.620000   \n",
              "25%       2.710000     2.780000     2.660000     2.707500     2.707500   \n",
              "50%       4.250000     4.350000     4.175000     4.275000     4.275000   \n",
              "75%      11.585000    11.785000    11.310000    11.555000    11.555000   \n",
              "max      24.940001    27.299999    24.629999    25.260000    25.260000   \n",
              "\n",
              "             Volume  \n",
              "count  1.172000e+03  \n",
              "mean   3.785747e+07  \n",
              "std    3.456219e+07  \n",
              "min    0.000000e+00  \n",
              "25%    1.308202e+07  \n",
              "50%    2.900380e+07  \n",
              "75%    5.064272e+07  \n",
              "max    3.250584e+08  "
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 3,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Explore Features"
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "features = dataset[['Open','High','Low','Close','Adj Close','Volume']]\n",
        "features.head(9)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 4,
          "data": {
            "text/html": [
              "<div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Open</th>\n",
              "      <th>High</th>\n",
              "      <th>Low</th>\n",
              "      <th>Close</th>\n",
              "      <th>Adj Close</th>\n",
              "      <th>Volume</th>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>Date</th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "      <th></th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>2014-01-02</th>\n",
              "      <td>3.85</td>\n",
              "      <td>3.98</td>\n",
              "      <td>3.84</td>\n",
              "      <td>3.95</td>\n",
              "      <td>3.95</td>\n",
              "      <td>20548400</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-03</th>\n",
              "      <td>3.98</td>\n",
              "      <td>4.00</td>\n",
              "      <td>3.88</td>\n",
              "      <td>4.00</td>\n",
              "      <td>4.00</td>\n",
              "      <td>22887200</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-06</th>\n",
              "      <td>4.01</td>\n",
              "      <td>4.18</td>\n",
              "      <td>3.99</td>\n",
              "      <td>4.13</td>\n",
              "      <td>4.13</td>\n",
              "      <td>42398300</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-07</th>\n",
              "      <td>4.19</td>\n",
              "      <td>4.25</td>\n",
              "      <td>4.11</td>\n",
              "      <td>4.18</td>\n",
              "      <td>4.18</td>\n",
              "      <td>42932100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-08</th>\n",
              "      <td>4.23</td>\n",
              "      <td>4.26</td>\n",
              "      <td>4.14</td>\n",
              "      <td>4.18</td>\n",
              "      <td>4.18</td>\n",
              "      <td>30678700</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-09</th>\n",
              "      <td>4.20</td>\n",
              "      <td>4.23</td>\n",
              "      <td>4.05</td>\n",
              "      <td>4.09</td>\n",
              "      <td>4.09</td>\n",
              "      <td>30667600</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-10</th>\n",
              "      <td>4.09</td>\n",
              "      <td>4.20</td>\n",
              "      <td>4.07</td>\n",
              "      <td>4.17</td>\n",
              "      <td>4.17</td>\n",
              "      <td>20840800</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-13</th>\n",
              "      <td>4.19</td>\n",
              "      <td>4.20</td>\n",
              "      <td>4.09</td>\n",
              "      <td>4.13</td>\n",
              "      <td>4.13</td>\n",
              "      <td>22856100</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2014-01-14</th>\n",
              "      <td>4.14</td>\n",
              "      <td>4.30</td>\n",
              "      <td>4.13</td>\n",
              "      <td>4.30</td>\n",
              "      <td>4.30</td>\n",
              "      <td>42434800</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>"
            ],
            "text/plain": [
              "            Open  High   Low  Close  Adj Close    Volume\n",
              "Date                                                    \n",
              "2014-01-02  3.85  3.98  3.84   3.95       3.95  20548400\n",
              "2014-01-03  3.98  4.00  3.88   4.00       4.00  22887200\n",
              "2014-01-06  4.01  4.18  3.99   4.13       4.13  42398300\n",
              "2014-01-07  4.19  4.25  4.11   4.18       4.18  42932100\n",
              "2014-01-08  4.23  4.26  4.14   4.18       4.18  30678700\n",
              "2014-01-09  4.20  4.23  4.05   4.09       4.09  30667600\n",
              "2014-01-10  4.09  4.20  4.07   4.17       4.17  20840800\n",
              "2014-01-13  4.19  4.20  4.09   4.13       4.13  22856100\n",
              "2014-01-14  4.14  4.30  4.13   4.30       4.30  42434800"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 4,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "target = dataset['Adj Close']\n",
        "target.head(9)"
      ],
      "outputs": [
        {
          "output_type": "execute_result",
          "execution_count": 5,
          "data": {
            "text/plain": [
              "Date\n",
              "2014-01-02    3.95\n",
              "2014-01-03    4.00\n",
              "2014-01-06    4.13\n",
              "2014-01-07    4.18\n",
              "2014-01-08    4.18\n",
              "2014-01-09    4.09\n",
              "2014-01-10    4.17\n",
              "2014-01-13    4.13\n",
              "2014-01-14    4.30\n",
              "Name: Adj Close, dtype: float64"
            ]
          },
          "metadata": {}
        }
      ],
      "execution_count": 5,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "msk = np.random.rand(len(dataset)) < 0.8\n",
        "train = features[msk]\n",
        "test = features[~msk]"
      ],
      "outputs": [],
      "execution_count": 6,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "markdown",
      "source": [
        "Multiple Regression Model"
      ],
      "metadata": {}
    },
    {
      "cell_type": "code",
      "source": [
        "from sklearn import linear_model\n",
        "regr = linear_model.LinearRegression()\n",
        "x = np.asanyarray(train[['Open','High','Low']])\n",
        "y = np.asanyarray(train[['Adj Close']])\n",
        "regr.fit (x, y)\n",
        "# The coefficients\n",
        "print ('Coefficients: ', regr.coef_)"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Coefficients:  [[-0.51741512  0.71866713  0.80080015]]\n"
          ]
        }
      ],
      "execution_count": 7,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "y_pred = regr.predict(test[['Open','High','Low']])\n",
        "x = np.asanyarray(test[['Open','High','Low']])\n",
        "y = np.asanyarray(test[['Adj Close']])\n",
        "print(\"Residual sum of squares: %.2f\"\n",
        "      % np.mean((y_pred - y) ** 2))\n",
        "\n",
        "# Explained variance score: 1 is perfect prediction\n",
        "print('Variance score: %.2f' % regr.score(x, y))"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Residual sum of squares: 0.01\n",
            "Variance score: 1.00\n"
          ]
        }
      ],
      "execution_count": 8,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    },
    {
      "cell_type": "code",
      "source": [
        "regr = linear_model.LinearRegression()\n",
        "x = np.asanyarray(train[['Open','High','Low','Volume']])\n",
        "y = np.asanyarray(train[['Adj Close']])\n",
        "regr.fit (x, y)\n",
        "print ('Coefficients: ', regr.coef_)\n",
        "y_= regr.predict(test[['Open','High','Low','Volume']])\n",
        "x = np.asanyarray(test[['Open','High','Low','Volume']])\n",
        "y = np.asanyarray(test[['Adj Close']])\n",
        "print(\"Residual sum of squares: %.2f\"% np.mean((y_ - y) ** 2))\n",
        "print('Variance score: %.2f' % regr.score(x, y))"
      ],
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Coefficients:  [[-5.28488950e-01  8.11577802e-01  7.19334192e-01 -8.48431352e-10]]\n",
            "Residual sum of squares: 0.01\n",
            "Variance score: 1.00\n"
          ]
        }
      ],
      "execution_count": 9,
      "metadata": {
        "collapsed": false,
        "outputHidden": false,
        "inputHidden": false
      }
    }
  ],
  "metadata": {
    "kernel_info": {
      "name": "python3"
    },
    "language_info": {
      "nbconvert_exporter": "python",
      "file_extension": ".py",
      "name": "python",
      "version": "3.5.5",
      "mimetype": "text/x-python",
      "pygments_lexer": "ipython3",
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      }
    },
    "kernelspec": {
      "name": "python3",
      "language": "python",
      "display_name": "Python 3"
    },
    "nteract": {
      "version": "0.12.2"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 4
}